In [1]:
import pandas as pd
import plotly.express as px
import numpy as np
import plotly.graph_objects as go
import re
/Users/lettyuy/opt/anaconda3/lib/python3.9/site-packages/scipy/__init__.py:155: UserWarning: A NumPy version >=1.18.5 and <1.25.0 is required for this version of SciPy (detected version 1.26.0
  warnings.warn(f"A NumPy version >={np_minversion} and <{np_maxversion}"
In [2]:
df = pd.read_csv("Hot 100.csv")
df['chart_date'] = pd.to_datetime(df['chart_date'])
df['chart_debut'] = pd.to_datetime(df['chart_debut'])
df['chart_year'] = df['chart_date'].dt.year
df.head()
Out[2]:
chart_position chart_date song performer song_id instance time_on_chart consecutive_weeks previous_week peak_position worst_position chart_debut chart_url chart_year
0 84 1990-05-05 "B" Girls Young And Restless "B" GirlsYoung And Restless 1.0 1 NaN NaN 84 84 1990-05-05 https://www.billboard.com/charts/hot-100/1990-... 1990
1 78 1990-05-12 "B" Girls Young And Restless "B" GirlsYoung And Restless 1.0 2 1.0 84.0 78 84 1990-05-05 https://www.billboard.com/charts/hot-100/1990-... 1990
2 68 1990-05-19 "B" Girls Young And Restless "B" GirlsYoung And Restless 1.0 3 2.0 78.0 68 84 1990-05-05 https://www.billboard.com/charts/hot-100/1990-... 1990
3 60 1990-05-26 "B" Girls Young And Restless "B" GirlsYoung And Restless 1.0 4 3.0 68.0 60 84 1990-05-05 https://www.billboard.com/charts/hot-100/1990-... 1990
4 58 1990-06-02 "B" Girls Young And Restless "B" GirlsYoung And Restless 1.0 5 4.0 60.0 58 84 1990-05-05 https://www.billboard.com/charts/hot-100/1990-... 1990
In [3]:
#df['performer'] = df['performer'].str.split(',|&| and | featuring | feat\. | ft\. ')
#df = df.explode('performer')
In [4]:
#df['individual_artist'] = df['performer'].str.split(',|&| and | featuring | feat\. | ft\. ')
#df = df.explode('individual_artist')
In [5]:
df['individual_artist'] = df['performer'].apply(lambda x: re.split(r',|&| and | featuring | feat\. | ft\. ', x, flags=re.IGNORECASE))
df = df.explode('individual_artist')
df['individual_artist'] = df['individual_artist'].str.strip()
In [6]:
avg_chart_positions = df.groupby(['song', 'individual_artist'])['chart_position'].mean().round().astype(int).reset_index()
avg_chart_positions = avg_chart_positions.rename(columns={'chart_position': 'avg_chart_position'})
df = pd.merge(df, avg_chart_positions, on=['song', 'individual_artist'], how='left')

df_at_1 = df[df['chart_position'] == 1]

unique_songs_at_1 = df_at_1.groupby(['individual_artist', 'song']).size().reset_index().rename(columns={0: 'count'})
individual_artist_hits = unique_songs_at_1.groupby('individual_artist').size()

one_hit_artists_list = individual_artist_hits[individual_artist_hits == 1].index.tolist()

df_one_hit_wonders = df[(df['chart_position'] == 1) & df['individual_artist'].isin(one_hit_artists_list)]
df_one_hit_wonders = df_one_hit_wonders.drop_duplicates(subset=['song', 'individual_artist'])

artists_with_staying_power_list = individual_artist_hits[individual_artist_hits >= 3].index.tolist()
df_artists_with_staying_power = df[(df['chart_position'] == 1) & df['individual_artist'].isin(artists_with_staying_power_list)]
df_artists_with_staying_power = df_artists_with_staying_power.drop_duplicates(subset=['song', 'individual_artist'])
In [7]:
def get_top_10_per_year(group):
    return group.nlargest(10, 'consecutive_weeks')

df_one_hit_wonders['Source'] = 'One Hit Wonders'
df_artists_with_staying_power['Source'] = 'Artists with Staying Power'

top_10_one_hit_wonders_yearly = df_one_hit_wonders.groupby('chart_year').apply(get_top_10_per_year).reset_index(drop=True)
top_10_staying_power_yearly = df_artists_with_staying_power.groupby('chart_year').apply(get_top_10_per_year).reset_index(drop=True)

top_10_combined_yearly = pd.concat([top_10_one_hit_wonders_yearly, top_10_staying_power_yearly])

top_10_combined_yearly.rename(columns={'time_on_chart': 'Time on Chart', 'avg_chart_position': 'Average Chart Position', 'song': 'Song'}, inplace=True)
In [8]:
all_years = top_10_combined_yearly['chart_year'].unique()
all_sources = ["One Hit Wonders", "Artists with Staying Power"]
In [9]:
expanded_data = []

for year in all_years:
    for Source in all_sources:
        subset = top_10_combined_yearly[(top_10_combined_yearly['chart_year'] == year) & (top_10_combined_yearly['Source'] == Source)]
        if subset.empty:
            expanded_data.append({
                'chart_year': year,
                'Source': Source,
                'Average Chart Position': np.nan,
                'Time on Chart': np.nan,
                'individual_artist': f'Placeholder {Source} {year}'
            })
        else:
            expanded_data.extend(subset.to_dict('records'))

expanded_df = pd.DataFrame(expanded_data)
In [10]:
fig = px.scatter(
    expanded_df,
    x="Average Chart Position",
    y="Time on Chart",
    animation_frame="chart_year",
    animation_group="individual_artist",
    hover_name="individual_artist",
    hover_data={"Song": True, "Source": False, "chart_year": False},
    color="Source",
    size_max=55,
    range_x=[top_10_combined_yearly['Average Chart Position'].max(), top_10_combined_yearly['Average Chart Position'].min()],
    range_y=[0, top_10_combined_yearly['Time on Chart'].max()]
)

x_mid = 30
y_mid = 30

fig.add_shape(
    go.layout.Shape(
        type="line",
        x0=x_mid,
        x1=x_mid,
        y0=0,
        y1=top_10_combined_yearly['Time on Chart'].max(),
        line=dict(color="Black", dash="dash", width=0.5)
    )
)

fig.add_shape(
    go.layout.Shape(
        type="line",
        x0=top_10_combined_yearly['Average Chart Position'].max(),
        x1=top_10_combined_yearly['Average Chart Position'].min(),
        y0=y_mid,
        y1=y_mid,
        line=dict(color="Black", dash="dash", width=0.5)
    )
)

fig.add_annotation(
    text="Letty Uy - CIS 9655",
    xref="paper",
    yref="paper",
    x=1,  
    y=1,  
    showarrow=False,
    font=dict(
        size=12,
        color="black"
    )
)
fig.add_annotation(
    text="Lower average rank, long duration",
    x=x_mid + (x_mid / 2),
    y=y_mid + (y_mid / 2),
    showarrow=False
)

fig.add_annotation(
    text="Higher average rank, long duration",
    x=x_mid - (x_mid / 2),
    y=y_mid + (y_mid / 2),
    showarrow=False
)

fig.add_annotation(
    text="Higher average rank, short duration",
    x=x_mid - (x_mid / 2),
    y=y_mid - (y_mid / 2),
    showarrow=False
)

fig.add_annotation(
    text="Lower average rank, short duration",
    x=x_mid + (x_mid / 2),
    y=y_mid - (y_mid / 2),
    showarrow=False
)

fig.add_annotation(
    text="Letty Uy - CIS 9655",
    xref="paper",
    yref="paper",
    x=1,  
    y=1,  
    showarrow=False,
    font=dict(
        size=12,
        color="black"
    )
)

fig.update_layout(
    xaxis_title="Average Chart Position",
    yaxis_title="Total Weeks at #1"
)

fig.show()